
#Treated households:
create_event_data<-function(rawdata,
                            #if no covariates for balancing or stratification, just specify a constant (as follows):
                            covariate_base_stratify=1, #vector of variable names, treatment effects are stratified by their joint unique values.
                            covariate_base_balance=1, #vector of variable names to include in finding exact matches for treated units
                            base_restrict = 1, #a single var, restricting to households for which var == 1 in base period,
                            timevar,
                            unitvar,
                            cohortvar, #period when the particular event of interest arises
                            anycohortvar = NULL, #period when ANY kind of event arises, used to find control cohorts
                            #(for whom no event of any kind has yet to happen)
                            #Only specify if it differs from cohortvar
                            base_time = -1, #reference value for event time; the period from which differences over time are taken
                            onset_minimum=-Inf, #Drop treated guys whose onset happens BEFORE this time period
                            onset_maximum=Inf, #Drop treated guys whose onset happens AFTER this time period
                            never_treat_action = "both", #options: "both", "only", and "exclude"
                            #Both: include both never-treated and later-treated units as controls
                            #Only: use only never-treated units as controls
                            #Exclude: use only later-treated units as controls
                            lower_event_time = -Inf, #Earliest period (relative to treatment time) for which to estimate effects
                            upper_event_time = Inf, #Latest period (relative to treatment time) for which to estimate effects
                            balanced_panel = FALSE, #If TRUE, keep only units observed over full interval [lower_event_time, upper_event_time]
                            stratify_balance_val = NA,
                            cohortbin=1,
                            balancevars_stratbal=1) 
{
  
  if(lower_event_time > base_time) stop("lower_event_time must lie below base_time")
  if(!is.data.table(rawdata)) stop("rawdata must be a data.table")
  maindata<-copy(rawdata)
  if(is.null(anycohortvar)) {
    maindata[,anycohortvar:=get(cohortvar)]
    anycohortvar<-"anycohortvar"
  }
  if(is.numeric(base_time)) maindata[,base_time:=base_time]
  else {
    setnames(maindata,base_time,"base_time")
    maindata[,base_time_orig:=base_time]
  }
  setnames(maindata,c(timevar,unitvar,cohortvar,anycohortvar),c("time","id","cohort","anycohort"))
  if(base_restrict != 1) setnames(maindata,base_restrict,"base_restrict")
  
  if(any(is.na(maindata$cohort))) stop("cohort variable should not be missing (it can be infinite instead)")
  if(any(is.na(maindata$anycohort))) stop("anycohort variable should not be missing (it can be infinite instead)")
  
  
  treatdata<-copy(maindata[cohort>=onset_minimum  & ! is.infinite(cohort) & cohort == anycohort ,])
  treatdata[,treated:=1]
  treatdata[,event_time:=time-cohort]
  treatdata<-treatdata[event_time >= lower_event_time & event_time <= upper_event_time,]
  
  #stacking control cohorts.
  #I assume people who never suffer the event have a value cohort = Inf
  controldata<-NULL
  for(o in unique(treatdata$cohort)){
    if(never_treat_action=="both") controlcohort <- maindata[(anycohort > o | is.infinite(anycohort)) ,]
    if(never_treat_action=="exclude") controlcohort <- maindata[(anycohort > o & !is.infinite(anycohort)) ,]
    if(never_treat_action=="only") controlcohort <- maindata[is.infinite(anycohort) ,]
    
    controlcohort[,cohort := o]
    controlcohort[,event_time := time - cohort]
    
    #Make sure people in the control cohort are actually observed in that period
    #(to verify they don't belong to the cohort)
    controlcohort[ ,obscohort := max(time == o),by=id]
    controlcohort<-controlcohort[obscohort==1,]
    controlcohort[,obscohort:=NULL]
    #drop someone from the control cohort when they get treated:
    controlcohort<-controlcohort[anycohort - cohort > event_time ,]
    
    controldata<-rbind(controldata,controlcohort)
  }
  controldata[,treated:=0]
  controldata<-controldata[event_time >= lower_event_time & event_time <= upper_event_time,]
  
  #constructing the regression dataset, stacking each non-base year 
  #with a copy of the base year observation.
  treatdata[,obsbase:=sum(event_time==base_time),by=id]
  controldata[,obsbase:=sum(event_time==base_time),by=.(id,cohort)]
  
  #If base_time varies across units, reassigning it to a common reference value:
  #This is relevant, for instance, with a dataset that moves from annual to bi-annual
  treatdata[event_time == base_time,event_time :=max(base_time)]
  treatdata[,base_time :=max(base_time)]
  controldata[event_time == base_time,event_time :=max(base_time)]
  controldata[,base_time :=max(base_time)]
  
  treatdata[,obscount:=1]
  controldata[,obscount:=1]
  treatdata[,obscount:=sum(obscount),by=id]
  controldata[,obscount:=sum(obscount),by=.(id,cohort)]
  
  if(is.character(covariate_base_stratify)){
    for(out in covariate_base_stratify){
      controldata[,eval(out) := min(get(out) + 9e9 *(event_time != base_time)), by=.(id,cohort)]
      controldata[get(out) >= 9e9,eval(out) := NA, ]
      controldata[,eval(out) := as.factor(get(out))]
      treatdata[,eval(out) := min(get(out) + 9e9 *(event_time != base_time)), by=.(id,cohort)]
      treatdata[get(out) >= 9e9,eval(out) := NA, ]
      treatdata[,eval(out) := as.factor(get(out))]
    }
  }
  if(is.character(covariate_base_balance)){
    for(out in covariate_base_balance){
      controldata[,eval(out) := min(get(out) + 9e9 *(event_time != base_time)), by=.(id,cohort)]
      controldata[get(out) >= 9e9,eval(out) := Inf, ]
      controldata[,eval(out) := as.factor(get(out))]
      treatdata[,eval(out) := min(get(out) + 9e9 *(event_time != base_time)), by=.(id,cohort)]
      treatdata[get(out) >= 9e9,eval(out) := Inf, ]
      treatdata[,eval(out) := as.factor(get(out))]
    }
  }
  controldata[,base_restrict := max(base_restrict * (event_time == base_time), na.rm=TRUE),by=.(id, cohort)]
  controldata <- controldata[base_restrict == 1,]
  treatdata[,base_restrict := max(base_restrict * (event_time == base_time), na.rm=TRUE),by=.(id, cohort)]
  treatdata <- treatdata[base_restrict == 1,]
  
  event_times<-treatdata[,unique(event_time)]
  eventdata<-NULL
  #For the final dataset, we must stack each pairwise combo of (base year, other year)
  #for each household. The reason? We will assign control households weights that vary
  #based on the other year, as households enter/exit the sample.
  if(balanced_panel==FALSE){
    for(t in event_times){
      treatdata[,obst:=sum(event_time==t),by=id]
      controldata[,obst:=sum(event_time==t),by=.(id,cohort)]
      
      pairdata<-rbind(treatdata[obsbase==1 & obst==1 & base_time != t & (event_time == t | event_time == base_time),],
                      controldata[obsbase==1 & obst==1 & base_time != t & (event_time == t | event_time == base_time),])
      pairdata[,time_pair:= t]
      eventdata<-rbind(eventdata,
                       pairdata)
    }
  }
  eventdata[,obst:=NULL]
  eventdata[,obsbase:=NULL]
  eventdata[,anycohort:=NULL]
  #This stacking is unnecessary if we restrict to a balanced panel. Then no one enters
  #or exits the panel, and weights are fixed at the household level.
  if(balanced_panel==TRUE){
    eventdata<-rbind(eventdata,
                     treatdata[obscount== max(treatdata$obscount),],
                     controldata[obscount>=max(treatdata$obscount),]
    )
    
  }
  
  eventdata[,post:=event_time >= 0]
  eventdata[,id:=as.factor(id)]
  eventdata[,cohort:=as.factor(cohort)]  
  eventdata[,event_time:=as.factor(event_time)]
  eventdata[,time_pair:=as.factor(time_pair)]
  eventdata[,time:=as.factor(time)]
  
  basefactor<-unique(eventdata[event_time==base_time,event_time])
  basefactor<-basefactor[length(basefactor)]
  eventdata[,event_time:=relevel(event_time,ref=as.character(basefactor))]
  
  
  
  #calculating weights so that controls match treated households on characteristics
  #Note: this may have a lot of fixed effects, and may need to be broken down into 
  #multiple smaller regressions:
  if(is.character(covariate_base_stratify)) eventdata[,stratify:=interaction(eventdata[,covariate_base_stratify,with=FALSE])]
  else eventdata[,stratify:=factor(1,levels=c(1,"OMIT"))]
  if(is.character(covariate_base_balance)) eventdata[,balancevars:=interaction(eventdata[,covariate_base_balance,with=FALSE])]
  else eventdata[,balancevars:=factor(1,levels=c(1,"OMIT"))]
  eventdata[,pval:=feols(treated ~ 1 | interaction(cohort,event_time,time_pair,stratify,balancevars),
                         data = eventdata)$fitted.values]
  
  eventdata[treated==1 & pval < 1 & pval > 0,pweight:=1]
  eventdata[treated==0 & pval < 1 & pval > 0,pweight:=pval/(1-pval)]
    eventdata[,pval:=NULL]
  
  #calculating weights to match control and treated households on characteristics, across strata
    if(!is.na(stratify_balance_val)){
      eventdata[,compcohort:=as.numeric(as.character(cohort))]
      eventdata[,compcohort:=round((compcohort)/cohortbin,0)*cohortbin]
    eventdata[,treated_base := treated == 1 & stratify == stratify_balance_val]
    

    stratvals<-levels(eventdata$stratify)
    for(strat in stratvals){
      if(strat != stratify_balance_val){
        stratbalmodel <- feols(treated_base ~ 1 | interaction(compcohort,event_time,time_pair,balancevars_stratbal),
              data = eventdata[(treated == 1 & stratify == stratify_balance_val) | 
                                 (treated == 1 & stratify == strat),],
              weights = eventdata[(treated == 1 & stratify == stratify_balance_val) | 
                                    (treated == 1 & stratify == strat),pweight])
        eventdata[, eval(paste0("pval_1",strat)) := predict(stratbalmodel,
                                                                   eventdata)]
        
        eventdata[treated == 1 & stratify == strat, pweight_stratbal := pweight*get(paste0("pval_1",strat))/(1-get(paste0("pval_1",strat)))]
      }
      else    eventdata[treated == 1 & stratify == strat, pweight_stratbal := pweight]

      stratbalmodel <- feols(treated_base ~ 1 | interaction(compcohort,event_time,time_pair,balancevars,balancevars_stratbal),
                             data = eventdata[(treated == 1 & stratify == stratify_balance_val) | 
                                                (treated == 0 & stratify == strat),],
                             weights = eventdata[(treated == 1 & stratify == stratify_balance_val) | 
                                                   (treated == 0 & stratify == strat),pweight])
      
      eventdata[, eval(paste0("pval_0",strat)) := predict(stratbalmodel,
                                                                 eventdata)]
      eventdata[treated == 0 & stratify == strat, pweight_stratbal := pweight*get(paste0("pval_0",strat))/(1-get(paste0("pval_0",strat)))]
      

    }
    
      checkvars <- names(eventdata)[grepl("pval_",names(eventdata))]
      for(var in checkvars){
      eventdata[get(var) == 1 | get(var) == 0 | is.na(get(var)),  pweight_stratbal := NA ]
      }
    }
  
  eventdata[,treated:=as.factor(treated)]
  
  return(eventdata)
}


event_ATTs<-function(eventdata,
                     outcomes,#vector of variable names
                     clustervar="id", 
                     weights="pweight"){
  #These regressions should work identically if the fixed effects (after the "|") were replaced with:
  # interaction(time_pair,id,cohort)
  eventdata[,treated_post := as.factor((treated == 1) * (post == 1))]
  eventdata[,treated_pre := as.factor((treated == 1) * (post == 0))]
  eventdata[,treated_event_time := event_time]
  eventdata[treated==0,treated_event_time := 1] #1 is the base level
  
  eventdata[,event_time_stratify:=interaction(event_time,stratify)]
  eventdata[,treated_post_stratify := interaction(treated_post,stratify)]
  eventdata[,treated_pre_stratify := interaction(treated_pre,stratify)]
  eventdata[treated_pre==0,treated_pre_stratify := 1]
  eventdata[event_time==base_time,treated_pre_stratify := 1]
  
  eventdata[,unitfe := interaction(time_pair,id,treated,cohort,stratify)]
  
  
  eventdata[,treated_event_time_stratify := interaction(event_time,stratify)]
  
  
  #Omitting base year for all levels of --stratify--:
  eventdata[event_time==base_time,event_time_stratify := paste0(c(max(eventdata$base_time),1),collapse=".")]
  eventdata[,event_time_stratify:=relevel(event_time_stratify,ref = paste0(c(max(eventdata$base_time),1),collapse="."))]
  
  #Omitting base year for all levels of --stratify--, for treated people
  eventdata[treated == 0 ,treated_event_time_stratify := paste0(c(max(eventdata$base_time),1),collapse=".")]
  eventdata[event_time==base_time,treated_event_time_stratify := paste0(c(max(eventdata$base_time),1),collapse=".")]
  eventdata[,treated_event_time_stratify:=relevel(treated_event_time_stratify,ref = paste0(c(max(eventdata$base_time),1),collapse="."))]
  
  
  #Omitting effect for untreated people or observations in pre-period:
  eventdata[treated_post == 0 ,treated_post_stratify := paste0(c(0,1),collapse=".")]
  eventdata[,treated_post_stratify:=relevel(treated_post_stratify,ref = paste0(c(0,1),collapse="."))]
  
  
  results<-feols(as.formula(paste0("c(",
                                   paste0(outcomes,collapse=",") , ") ~ event_time_stratify + treated_event_time_stratify | unitfe"
  )),
  data = eventdata,
  weights= eventdata[,get(weights)],
  cluster=clustervar)
  
  results_pooled<-feols(as.formula(paste0("c(",
                                          paste0(outcomes,collapse=",") , 
                                          ") ~ event_time_stratify + treated_pre_stratify + treated_post_stratify | unitfe"
  )),
  data = eventdata,
  weights= eventdata[,get(weights)],
  cluster=clustervar)
  
  results_means<-feols(as.formula(paste0("c(",
                                         paste0(outcomes,collapse=",") , 
                                         ") ~ interaction(stratify) - 1"
  )),
  data = eventdata[treated == 1 & event_time == base_time & get(weights) == 1,],
  cluster=clustervar)
  
  return(list(pooled = results_pooled,
              dynamic = results,
              means = results_means))
}

print_ATTs<-function(results, 
                     vars,
                     varnames,
                     stratify_values=NULL,
                     event_name="Disability Onset",
                     base_time = NULL,
                     plot_name,
                     decimals=0,
                     plot_pval=0.05){


    p<-1
    for(outcome in vars){
      pos <- which(gsub("lhs: ", "", names(results$pooled))==outcome)

      plot_table<-data.table(
        coef = results$dynamic[[pos]]$coefficients[
          grepl("treated_event_time_stratify",names(results$dynamic[[pos]]$coefficients))
        ],
        se = results$dynamic[[pos]]$se[
          grepl("treated_event_time_stratify",names(results$dynamic[[pos]]$se))
        ],
        event_time = as.numeric(as.character(gsub("\\..*","", 
                                                  gsub("treated_event_time_stratify", "",
                                                       names(results$dynamic[[pos]]$coefficients)[grepl("treated_event_time_stratify",names(results$dynamic[[pos]]$se))])
        ))),
        stratify_value = as.numeric(as.character(gsub(".*\\.","", 
                                                      gsub("treated_event_time_stratify", "",
                                                           names(results$dynamic[[pos]]$coefficients)[grepl("treated_event_time_stratify",names(results$dynamic[[pos]]$se))])
        )))
      )
      #adding in omitted year:
      #Looking for the first missing period in the interval of plotted periods
      #If the interval of plotted periods has no gap, I assume refernce period is first
      #period in the data.
      if(is.null(base_time)){ 
        missingperiods<-(min(plot_table$event_time):max(plot_table$event_time))[!(min(plot_table$event_time):max(plot_table$event_time))%in%unique(plot_table$event_time)]
        if(length(missingperiods)==0) refperiod <- min(plot_table$event_time)-1
        else refperiod <- min(missingperiods)
      }
      else refperiod <- base_time
      plot_table<-rbind(plot_table, data.table(coef=0,
                                               se = 0,
                                               event_time = refperiod,
                                               stratify_value = unique(plot_table$stratify_value)
      ))
      
      plot_table[,upper:= coef + abs(qt(plot_pval/2,
                                        df = results$dynamic[[pos]]$nobs - results$dynamic[[pos]]$nparams))*se]
      plot_table[,lower:= coef - abs(qt(plot_pval/2,
                                        df = results$dynamic[[pos]]$nobs - results$dynamic[[pos]]$nparams))*se]
      pdf(paste0("./",paste(c(plot_name,outcome),collapse="_"),".pdf"))
      for(stratval in stratify_values){
        print(ggplot(data=plot_table[stratify_value==stratval,]) + 
                geom_ribbon(aes(ymin = lower, ymax=upper, x=event_time), fill="grey50", alpha=0.5) + 
                geom_hline(yintercept = 0, linetype="dashed") + 
                geom_vline(xintercept = 0, linetype="dashed") + 
                geom_line(aes(x = event_time,
                              y = coef)) +
                scale_x_continuous( breaks = pretty_breaks(12)) +
                ylim(c(min(plot_table$lower),max(plot_table$upper))) +
                labs(y=varnames[p], x = paste0("Years from ",event_name))
        )
      }
      dev.off()
      p<-p+1
    }

}


create_table_row<-function(table_results, vargroupnames, varpos, namepos, pos){
  if("treated_event_time_stratify0.0" %in%names(table_results$dynamic[[pos]]$coefficients)){
    coef00 <- table_results$dynamic[[pos]]$coefficients[["treated_event_time_stratify0.0"]]
    se00 <- table_results$dynamic[[pos]]$se[["treated_event_time_stratify0.0"]]
    p00 <- ifelse(is.na(table_results$dynamic[[pos]]$coeftable["treated_event_time_stratify0.0","Pr(>|t|))"]),0,table_results$dynamic[[pos]]$coeftable["treated_event_time_stratify0.0","Pr(>|t|))"])
  }
  else{
    coef00 <- "---"
    se00<- NA
    p00 <- 1
  }
  if("treated_event_time_stratify0.1" %in%names(table_results$dynamic[[pos]]$coefficients)) {
    coef01 <- table_results$dynamic[[pos]]$coefficients[["treated_event_time_stratify0.1"]]
    se01 <- table_results$dynamic[[pos]]$se[["treated_event_time_stratify0.1"]]
    p01 <- ifelse(is.na(table_results$dynamic[[pos]]$coeftable["treated_event_time_stratify0.1","Pr(>|t|))"]),0,table_results$dynamic[[pos]]$coeftable["treated_event_time_stratify0.1","Pr(>|t|))"])
  }
  else{
    coef01 <- "---"
    se01 <- NA
    p01 <- 1
  }
  if(is.numeric(coef00) & is.numeric(coef01)){
    coefdiff0 <- coef01 - coef00
    pdiff0 <- 2*pt(ifelse(is.na((abs(coef01 - coef00))/((se01^2 + se00^2)^0.5)),
                          0,
                          (abs(coef01 - coef00))/((se01^2 + se00^2)^0.5)),
                   df = table_results$dynamic[[pos]]$nobs -table_results$dynamic[[pos]]$nparams,
                   lower.tail=FALSE)
    sediff0 <- (se01^2 + se00^2)^0.5
  }
  else {
    coefdiff0 <- "---"
    sediff0 <- NA
    pdiff0 <- 1
  }
  if("treated_event_time_stratify1.0" %in%names(table_results$dynamic[[pos]]$coefficients)) {
    coef10 <- table_results$dynamic[[pos]]$coefficients[["treated_event_time_stratify1.0"]]
    se10 <- table_results$dynamic[[pos]]$se[["treated_event_time_stratify1.0"]]
    p10 <- ifelse(is.na(table_results$dynamic[[pos]]$coeftable["treated_event_time_stratify1.0","Pr(>|t|))"]),0,table_results$dynamic[[pos]]$coeftable["treated_event_time_stratify1.0","Pr(>|t|))"])
  }
  else {
    coef10 <- "---"
    se10 <- NA
    p10 <- 1
  }
  if("treated_event_time_stratify1.1" %in%names(table_results$dynamic[[pos]]$coefficients)) {
    coef11 <- table_results$dynamic[[pos]]$coefficients[["treated_event_time_stratify1.1"]]
    se11 <- table_results$dynamic[[pos]]$se[["treated_event_time_stratify1.1"]]
    p11 <- ifelse(is.na(table_results$dynamic[[pos]]$coeftable["treated_event_time_stratify1.1","Pr(>|t|))"]),0,table_results$dynamic[[pos]]$coeftable["treated_event_time_stratify1.1","Pr(>|t|))"])
  }
  else{
    coef11 <- "---"
    se11 <- NA
    p11 <- 1
  }
  if(is.numeric(coef10) & is.numeric(coef11)){
    coefdiff1 <- coef11 - coef10
    pdiff1 <- 2*pt(ifelse(is.na(abs(coef11 - coef10)/((se11^2 + se10^2)^0.5)),0,
                          abs(coef11 - coef10)/((se11^2 + se10^2)^0.5)),
                   df = table_results$dynamic[[pos]]$nobs -table_results$dynamic[[pos]]$nparams,
                   lower.tail=FALSE)
    sediff1 <- (se11^2 + se10^2)^0.5
  }
  else {
    coefdiff1 <- "---"
    sediff1 <- NA
    pdiff1 <- 1
  }
  if(is.numeric(coef00) & is.numeric(coef01) & is.numeric(coef10) & is.numeric(coef11)) {
    TR(vargroupnames[[varpos]][namepos])%:%
      TR(coef00, pvalues = p00, dec=decimals)%:%
      TR(coef01, pvalues = p01, dec=decimals)%:%
      TR(coefdiff0, pvalues = pdiff0, dec=decimals)%:%
      TR(coef10, pvalues = p10, dec=decimals)%:%
      TR(coef11, pvalues = p11, dec=decimals)%:%
      TR(coefdiff1, pvalues = pdiff1, dec=decimals) +
      TR("")%:%
      TR(se00, se = TRUE)%:%
      TR(se01, se = TRUE)%:%
      TR(sediff0, se = TRUE)%:%
      TR(se10, se = TRUE)%:%
      TR(se11, se = TRUE)%:%
      TR(sediff1, se = TRUE)
  }
  else if(is.numeric(coef00) & is.numeric(coef01)){
    TR(vargroupnames[[varpos]][namepos])%:%
      TR(coef00, pvalues = p00, dec=decimals)%:%
      TR(coef01, pvalues = p01, dec=decimals)%:%
      TR(coefdiff0, pvalues = pdiff0, dec=decimals)%:%
      TR(coef10)%:%
      TR(coef11)%:%
      TR(coefdiff1) +
      TR("")%:%
      TR(se00, se = TRUE)%:%
      TR(se01, se = TRUE)%:%
      TR(sediff0, se = TRUE)%:%
      TR(se10, se = TRUE)%:%
      TR(se11, se = TRUE)%:%
      TR(sediff1, se = TRUE)
  }
  else {
    TR(vargroupnames[[varpos]][namepos])%:%
      TR(coef00)%:%
      TR(coef01)%:%
      TR(coefdiff0)%:%
      TR(coef10)%:%
      TR(coef11)%:%
      TR(coefdiff1) +
      TR("")%:%
      TR(se00, se = TRUE)%:%
      TR(se01, se = TRUE)%:%
      TR(sediff0, se = TRUE)%:%
      TR(se10, se = TRUE)%:%
      TR(se11, se = TRUE)%:%
      TR(sediff1, se = TRUE)
  }

}
